public
Last active

DCT-II implementation using the vDSP on the iOS (and also OSX?)

  • Download Gist
dct.c
C
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
/* This implementation does DCT-II on iOS, using the iOS vDSP FFT methods. Supposedly this is faster than in plain C (i.e. FFTW), but I didn't do any tests to support this claim. For one thing, as far as I understand it, doing DCT takes less clock cycles than FFT, and using this code we do an FFT of size 2N for a DCT of size N (and then some post processing. Anyone is welcome to comment :)
 
This code was developed using the following 2 sources, and I owe great gratitude to their authors: http://developer.apple.com/library/ios/#documentation/Performance/Conceptual/vDSP_Programming_Guide/UsingFourierTransforms/UsingFourierTransforms.html and http://www.hydrogenaudio.org/forums//lofiversion/index.php/t39574.html
 
I don't claim I understand everything I do, but the thing is, it works -- as in, it gives me the same result as jtransform with scaling on :) And it's fast enough for my use: on the iPhone 4S I can get 320 DCTs of length 2048 per second. Perhaps I can improve this with some further tweaking (although I'm not sure I have to...)
 
Obviously the code below should be split between .c and .h file, just putting them all together here!
 
Note: This code does only 1D DCT. If you need 2D or 3D, this may be a good first step, but it's in no way done.
*/
 
#import <Accelerate/Accelerate.h>
#import <math.h>
 
struct dctII_setup_data {
FFTSetupD fft_setup;
double* dct_correction_factors;
vDSP_Length fft_log2length;
};
typedef struct dctII_setup_data dctII_setup_data;
 
#=======================================
 
dctII_setup_data dctII_setup(vDSP_Length dct_log2length, FFTRadix radix) {
vDSP_Length fft_log2length = dct_log2length + 1;
dctII_setup_data result;
result.fft_setup = vDSP_create_fftsetupD( fft_log2length, radix);
int dct_length = 1 << dct_log2length;
int fft_length = 1 << fft_log2length;
result.dct_correction_factors = (double*) malloc(sizeof(double) * fft_length);
for (int i=0;i<dct_length;i++) {
result.dct_correction_factors[2*i] = cos((M_PI*(dct_length-0.5)*i)/dct_length)/sqrt(dct_length*8.0);
result.dct_correction_factors[2*i+1] = sin((M_PI*(dct_length-0.5)*i)/dct_length)/sqrt(dct_length*8.0);
}
result.dct_correction_factors[0] = result.dct_correction_factors[0]/sqrt(2.0);
result.dct_correction_factors[1] = result.dct_correction_factors[1]/sqrt(2.0); //although this one is always 0 :)
result.fft_log2length = fft_log2length;
return result;
}
 
 
void dctII_1D(dctII_setup_data setup, double* ioData) {
vDSP_Length fft_length = 1 << setup.fft_log2length;
vDSP_Length dct_length = fft_length/2;
 
double* mirroreddata = malloc(sizeof(double)*fft_length);
for (int i=0;i<dct_length;i++) {
mirroreddata[i] = ioData[dct_length-i-1];
}
for (int i=dct_length; i<fft_length; i++) {
mirroreddata[i] = ioData[i-dct_length];
}
DSPDoubleSplitComplex* complexData = (DSPDoubleSplitComplex*)calloc(1, sizeof(DSPDoubleSplitComplex));
complexData->realp = malloc(sizeof(double) * dct_length);
complexData->imagp = malloc(sizeof(double) * dct_length);
vDSP_ctozD((DSPDoubleComplex*) mirroreddata, (vDSP_Stride) 2, complexData, (vDSP_Stride) 1, dct_length);
vDSP_fft_zripD( setup.fft_setup, complexData, (vDSP_Stride) 1, setup.fft_log2length, kFFTDirection_Forward);
vDSP_ztocD(complexData, (vDSP_Stride) 1, (DSPDoubleComplex *) mirroreddata, (vDSP_Stride) 2, dct_length);
vDSP_vmulD(mirroreddata, (vDSP_Stride) 1, setup.dct_correction_factors, (vDSP_Stride) 1, mirroreddata, (vDSP_Stride) 1, fft_length);
for (int i=0; i<dct_length; i++){
ioData[i] = mirroreddata[2*i]-mirroreddata[2*i+1];
}
 
free(complexData->realp);
free(complexData->imagp);
free(complexData);
free(mirroreddata);
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.