Skip to content

Instantly share code, notes, and snippets.

@hansenms
Last active August 29, 2015 14:12
Show Gist options
  • Save hansenms/ba3b85b15153fea5c9dc to your computer and use it in GitHub Desktop.
Save hansenms/ba3b85b15153fea5c9dc to your computer and use it in GitHub Desktop.
#include <fftw3.h>
#include <chrono>
#include <iostream>
#include <omp.h>
#include <vector>
int main(int argc, char ** argv){
using namespace std::chrono;
fftwf_complex *in, *out;
size_t n1 = 256;
size_t n2 = 256;
size_t n3 = 64;
size_t tot = n1*n2*n3;
size_t repeats = 1000;
in = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex)*tot);
out = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex)*tot);
const int n[2] = {n1,n2};
auto start1 = system_clock::now();
for (size_t i = 0; i < repeats; i++){
auto p_standard = fftwf_plan_dft_2d(n1,n2,in,out,FFTW_FORWARD,FFTW_ESTIMATE);
#pragma omp parallel for
for (size_t k = 0; k < n3; k++){
fftwf_execute_dft(p_standard,in+n1*n2*k,out+n1*n2*k);
}
}
auto end1 = system_clock::now();
std::cout << "OMP loop (warmup): " << duration_cast<milliseconds>(end1-start1).count() << std::endl;
start1 = system_clock::now();
for (size_t i = 0; i < repeats; i++){
auto p_standard = fftwf_plan_dft_2d(n1,n2,in,out,FFTW_FORWARD,FFTW_ESTIMATE);
#pragma omp parallel for
for (size_t k = 0; k < n3; k++){
fftwf_execute_dft(p_standard,in+n1*n2*k,out+n1*n2*k);
}
}
end1 = system_clock::now();
std::cout << "OMP loop (second run): " << duration_cast<milliseconds>(end1-start1).count() << std::endl;
fftwf_init_threads();
fftwf_plan_with_nthreads(omp_get_max_threads());
auto start = system_clock::now();
for (size_t i = 0; i < repeats; i++) {
auto p = fftwf_plan_many_dft(2,n,n3,in,n,1,n1*n2,out,n,1,n1*n2,FFTW_FORWARD,FFTW_ESTIMATE);
fftwf_execute(p);
}
auto end = system_clock::now();
std::cout << "Naive: " << duration_cast<milliseconds>(end-start).count() << std::endl;
start = system_clock::now();
for (size_t i=0; i < repeats; i++){
auto p = fftwf_plan_many_dft(2,n,n3,in,n,1,n1*n2,in,n,1,n1*n2,FFTW_FORWARD,FFTW_ESTIMATE);
fftwf_execute(p);
}
end = system_clock::now();
std::cout << "In place: " << duration_cast<milliseconds>(end-start).count() << std::endl;
start = system_clock::now();
for (size_t i = 0; i < repeats; i++){
auto p = fftwf_plan_many_dft(2,n,n3,in,n,1,n1*n2,out,n,1,n1*n2,FFTW_FORWARD,FFTW_MEASURE);
fftwf_execute(p);
}
end = system_clock::now();
std::cout << "Optimized: " << duration_cast<milliseconds>(end-start).count() << std::endl;
}
@hansenms
Copy link
Author

hansenms commented Jan 6, 2015

Compile with

g++ fft_test.cpp -fopenmp -std=c++11 -lfftw3f -lfftw3_omp -lfftw3f_omp  -lfftw3 -lm -O3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment