Create a gist now

Instantly share code, notes, and snippets.

DCT-II implementation using the vDSP on the iOS (and also OSX?)
/* This implementation does DCT-II on iOS, using the iOS vDSP FFT methods. Supposedly this is faster than in plain C (i.e. FFTW), but I didn't do any tests to support this claim. For one thing, as far as I understand it, doing DCT takes less clock cycles than FFT, and using this code we do an FFT of size 2N for a DCT of size N (and then some post processing. Anyone is welcome to comment :)
This code was developed using the following 2 sources, and I owe great gratitude to their authors: and
I don't claim I understand everything I do, but the thing is, it works -- as in, it gives me the same result as jtransform with scaling on :) And it's fast enough for my use: on the iPhone 4S I can get 320 DCTs of length 2048 per second. Perhaps I can improve this with some further tweaking (although I'm not sure I have to...)
Obviously the code below should be split between .c and .h file, just putting them all together here!
Note: This code does only 1D DCT. If you need 2D or 3D, this may be a good first step, but it's in no way done.
#import <Accelerate/Accelerate.h>
#import <math.h>
struct dctII_setup_data {
FFTSetupD fft_setup;
double* dct_correction_factors;
vDSP_Length fft_log2length;
typedef struct dctII_setup_data dctII_setup_data;
dctII_setup_data dctII_setup(vDSP_Length dct_log2length, FFTRadix radix) {
vDSP_Length fft_log2length = dct_log2length + 1;
dctII_setup_data result;
result.fft_setup = vDSP_create_fftsetupD( fft_log2length, radix);
int dct_length = 1 << dct_log2length;
int fft_length = 1 << fft_log2length;
result.dct_correction_factors = (double*) malloc(sizeof(double) * fft_length);
for (int i=0;i<dct_length;i++) {
result.dct_correction_factors[2*i] = cos((M_PI*(dct_length-0.5)*i)/dct_length)/sqrt(dct_length*8.0);
result.dct_correction_factors[2*i+1] = sin((M_PI*(dct_length-0.5)*i)/dct_length)/sqrt(dct_length*8.0);
result.dct_correction_factors[0] = result.dct_correction_factors[0]/sqrt(2.0);
result.dct_correction_factors[1] = result.dct_correction_factors[1]/sqrt(2.0); //although this one is always 0 :)
result.fft_log2length = fft_log2length;
return result;
void dctII_1D(dctII_setup_data setup, double* ioData) {
vDSP_Length fft_length = 1 << setup.fft_log2length;
vDSP_Length dct_length = fft_length/2;
double* mirroreddata = malloc(sizeof(double)*fft_length);
for (int i=0;i<dct_length;i++) {
mirroreddata[i] = ioData[dct_length-i-1];
for (int i=dct_length; i<fft_length; i++) {
mirroreddata[i] = ioData[i-dct_length];
DSPDoubleSplitComplex* complexData = (DSPDoubleSplitComplex*)calloc(1, sizeof(DSPDoubleSplitComplex));
complexData->realp = malloc(sizeof(double) * dct_length);
complexData->imagp = malloc(sizeof(double) * dct_length);
vDSP_ctozD((DSPDoubleComplex*) mirroreddata, (vDSP_Stride) 2, complexData, (vDSP_Stride) 1, dct_length);
vDSP_fft_zripD( setup.fft_setup, complexData, (vDSP_Stride) 1, setup.fft_log2length, kFFTDirection_Forward);
vDSP_ztocD(complexData, (vDSP_Stride) 1, (DSPDoubleComplex *) mirroreddata, (vDSP_Stride) 2, dct_length);
vDSP_vmulD(mirroreddata, (vDSP_Stride) 1, setup.dct_correction_factors, (vDSP_Stride) 1, mirroreddata, (vDSP_Stride) 1, fft_length);
for (int i=0; i<dct_length; i++){
ioData[i] = mirroreddata[2*i]-mirroreddata[2*i+1];
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment