Skip to content

Instantly share code, notes, and snippets.

@pijyoi
Created October 1, 2017 08:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pijyoi/f49eb329353a63567e494b223572947a to your computer and use it in GitHub Desktop.
Save pijyoi/f49eb329353a63567e494b223572947a to your computer and use it in GitHub Desktop.
program to time cufft for various sizes
#include <stdio.h>
#include <assert.h>
#include <vector>
#include <chrono>
#include <string>
#include <cufftXt.h>
int main(int argc, char **argv)
{
std::vector<int> vec_nfft;
for (int k=1; k < argc; k++) {
int val = std::stoi(argv[k]);
vec_nfft.push_back(val);
}
int niter = 10;
size_t nbytes = 512*1048576;
cuComplex *d_data;
cudaMalloc((void **)&d_data, nbytes);
cudaMemset(d_data, 0, nbytes);
for (auto&& nfft : vec_nfft)
{
// compute batch size that uses up all of our buffer
int nbatch = nbytes / (sizeof(cuComplex) * nfft);
size_t workSize;
cufftHandle plan_fft;
cufftCreate(&plan_fft);
cufftSetAutoAllocation(plan_fft, false);
cufftMakePlanMany(plan_fft, 1, &nfft,
0, 0, 0, 0, 0, 0,
CUFFT_C2C, nbatch, &workSize);
void *d_fftwork = nullptr;
cudaMalloc(&d_fftwork, workSize);
assert(d_fftwork != nullptr);
cufftSetWorkArea(plan_fft, d_fftwork);
cudaDeviceSynchronize();
auto time_start = std::chrono::system_clock::now();
for (int idx=0; idx < niter; idx++) {
cufftExecC2C(plan_fft, d_data, d_data, CUFFT_FORWARD);
}
cudaDeviceSynchronize();
auto time_stop = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed = time_stop - time_start;
printf("%d %f usec\n", nfft, elapsed.count() / nbatch / niter * 1e6);
cufftDestroy(plan_fft);
cudaFree(d_fftwork);
}
cudaFree(d_data);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment