Skip to content

Instantly share code, notes, and snippets.

@iccir
Last active September 2, 2019 10:07
Show Gist options
  • Save iccir/b92d60822d2259c6720c6a8fc5fdbf52 to your computer and use it in GitHub Desktop.
Save iccir/b92d60822d2259c6720c6a8fc5fdbf52 to your computer and use it in GitHub Desktop.
#include <math.h>
#include <Accelerate/Accelerate.h>
/* filter signal x with filter h and store the result in output.
* output must be at least as long as x
*/
void
fft_fir_filter( const float *x,
unsigned x_length,
const float *h,
unsigned h_length,
float *output)
{
// The length of the result from linear convolution is one less than the
// sum of the lengths of the two inputs.
unsigned result_length = x_length + h_length - 1;
unsigned overlap_length = result_length - x_length;
// Create buffer to store overflow across calls.
//
// iccir: This assumes that h_length doesn't change across calls.
// The overflow buffer would usually be passed in rather
// than being a static.
static float *overflow = NULL;
if (!overflow) {
overflow = malloc(sizeof(float) * (h_length - 1));
float zero = 0.0;
vDSP_vfill(&zero, overflow, 1, h_length - 1);
}
// You need to implement next_pow2, it should return the first power of 2
// greater than the argument passed to it.
unsigned fft_length = next_pow2(result_length);
// Create buffers to hold the zero-padded signal, filter kernel, and result.
float h_padded[fft_length];
float x_padded[fft_length];
float temp_result[fft_length];
// fill padded buffers with zeros
float zero = 0.0;
vDSP_vfill(&zero, h_padded, 1, fft_length);
vDSP_vfill(&zero, x_padded, 1, fft_length);
// Copy inputs into padded buffers
cblas_scopy(h_length, h, 1, h_padded, 1);
cblas_scopy(x_length, x, 1, x_padded, 1);
// The Accelerate FFT needs an initialized setup structure. This, like much
// of the above setup routine should be done outside of this function. I am
// putting it here for ease of demonstration. This only needs to happen once.
//
// iccir: A balancing call to vDSP_destroy_fftsetup() is needed to prevent a
// memory leak.
FFTSetup setup = vDSP_create_fftsetup(log2f((float)fft_length), FFT_RADIX2);
// Create complex buffers for holding the Fourier Transforms of h and x
// DSPSplitComplex holds pointers to two arrays of values, real, and imaginary.
// Each array should hold at least fft_length/2 samples.
DSPSplitComplex h_DFT;
DSPSplitComplex x_DFT;
// Create and assign the backing storage structures for each of these buffers.
// In your actual implementation, these should be allocated elsewhere and
// passed to this function along with the FFTSetup from above.
float h_real[fft_length/2];
float h_imag[fft_length/2];
h_DFT.realp = h_real;
h_DFT.imagp = h_imag;
float x_real[fft_length/2];
float x_imag[fft_length/2];
x_DFT.realp = x_real;
x_DFT.imagp = x_imag;
// Here we calculate the FFT of the filter kernel. This should be done once
// when you initialize your filter. As I mentioned previously, much of
// this setup routine should be done outside of this function and saved.
// Convert the real-valued filter kernel to split-complex form
// and store it in our DFT array.
vDSP_ctoz((DSPComplex*)h_padded, 2, &h_DFT, 1, (fft_length/2));
// Do in-place FFT of filter kernel
vDSP_fft_zrip(setup, &h_DFT, 1, log2f(fft_length), FFT_FORWARD);
// Calculate FFT of input signal...
// Convert the real-valued signal to split-complex form
// and store it in our DFT array.
vDSP_ctoz((DSPComplex*)x_padded, 2, &x_DFT, 1, (fft_length/2));
// Do in-place FFT of the input signal
vDSP_fft_zrip(setup, &x_DFT, 1, log2f(fft_length), FFT_FORWARD);
// This gets a bit strange. The vDSP FFT stores the real value at nyquist in the
// first element in the imaginary array. The first imaginary element is always
// zero, so no information is lost by doing this. The only issue is that we are
// going to use a complex vector multiply function from vDSP and it doesn't
// handle this format very well. We calculate this multiplication ourselves and
// add it into our result later.
// We'll need this later
float nyquist_multiplied = h_DFT.imagp[0] * x_DFT.imagp[0];
// Set the values in the arrays to 0 so they don't affect the multiplication
h_DFT.imagp[0] = 0.0;
x_DFT.imagp[0] = 0.0;
// Complex multiply x_DFT by h_DFT and store the result in x_DFT
vDSP_zvmul(&x_DFT, 1, &h_DFT, 1, &x_DFT,1, fft_length/2, 1);
// Now we put our hand-calculated nyquist value back
x_DFT.imagp[0] = nyquist_multiplied;
// Do the inverse FFT of our result
vDSP_fft_zrip(setup, &x_DFT, 1, log2f(fft_length), FFT_INVERSE);
// And convert split-complex format to real-valued
vDSP_ztoc(&x_DFT, 1, (DSPComplex*)temp_result, 2, fft_length/2);
// Now we have to scale our result by 1/(4*fft_length)
// (This is Apple's convention) to get our correct result.
float scale = (1.0 / (4.0 * fft_length) );
vDSP_vsmul(temp_result, 1, &scale, temp_result, 1, fft_length);
// The rest of our overlap handling is the same as in our previous
// implementation
// Add the overlap from the previous run
// use vDSP_vadd instead of loop
vDSP_vadd(temp_result, 1, overflow, 1, temp_result, 1, overlap_length);
// Copy overlap into overlap buffer
cblas_scopy(overlap_length, temp_result + x_length, 1, overflow, 1);
// write the final result to the output. use BLAS copy instead of loop
cblas_scopy(x_length, temp_result, 1, output, 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment