Skip to content

Instantly share code, notes, and snippets.

@al42and
Created April 25, 2023 14:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save al42and/c5b1cf3afe261585102971579c851e42 to your computer and use it in GitHub Desktop.
Save al42and/c5b1cf3afe261585102971579c851e42 to your computer and use it in GitHub Desktop.
VkFFT example failing with current "develop" branch
#include <iostream>
#include <vector>
#include "vkFFT.h"
const float inputdata[500] = {
-3.5, 6.3, 1.2, 0.3, 1.1, -5.7, 5.8, -1.9, -6.3, -1.4, 7.4, 2.4,
-9.9, -7.2, 5.4, 6.1, -1.9, -7.6, 1.4, -3.5, 0.7, 5.6, -4.2, -1.1,
-4.4, -6.3, -7.2, 4.6, -3.0, -0.9, 7.2, 2.5, -3.6, 6.1, -3.2, -2.1,
6.5, -0.4, -9.0, 2.3, 8.4, 4.0, -5.2, -9.0, 4.7, -3.7, -2.0, -9.5,
-3.9, -3.6, 7.1, 0.8, -0.6, 5.2, -9.3, -4.5, 5.9, 2.2, -5.8, 5.0,
1.2, -0.1, 2.2, 0.2, -7.7, 1.9, -8.4, 4.4, 2.3, -2.9, 6.7, 2.7,
5.8, -3.6, 8.9, 8.9, 4.3, 9.1, 9.3, -8.7, 4.1, 9.6, -6.2, 6.6,
-9.3, 8.2, 4.5, 6.2, 9.4, -8.0, -6.8, -3.3, 7.2, 1.7, 0.6, -4.9,
9.8, 1.3, 3.2, -0.2, 9.9, 4.4, -9.9, -7.2, 4.4, 4.7, 7.2, -0.3,
0.3, -2.1, 8.4, -2.1, -6.1, 4.1, -5.9, -2.2, -3.8, 5.2, -8.2, -7.8,
-8.8, 6.7, -9.5, -4.2, 0.8, 8.3, 5.2, -9.0, 8.7, 9.8, -9.9, -7.8,
-8.3, 9.0, -2.8, -9.2, -9.6, 8.4, 2.5, 6.0, -0.4, 1.3, -0.5, 9.1,
-9.5, -0.8, 1.9, -6.2, 4.3, -3.8, 8.6, -1.9, -2.1, -0.4, -7.1, -3.7,
9.1, -6.4, -0.6, 2.5, 8.0, -5.2, -9.8, -4.3, 4.5, 1.7, 9.3, 9.2,
1.0, 5.3, -4.5, 6.4, -6.6, 3.1, -6.8, 2.1, 2.0, 7.3, 8.6, 5.0,
5.2, 0.4, -7.1, 4.5, -9.2, -9.1, 0.2, -6.3, -1.1, -9.6, 7.4, -3.7,
-5.5, 2.6, -3.5, -0.7, 9.0, 9.8, -8.0, 3.6, 3.0, -2.2, -2.8, 0.8,
9.0, 2.8, 7.7, -0.7, -5.0, -1.8, -2.3, -0.4, -6.2, -9.1, -9.2, 0.5,
5.7, -3.9, 2.1, 0.6, 0.4, 9.1, 7.4, 7.1, -2.5, 7.3, 7.8, -4.3,
6.3, -0.8, -3.8, -1.5, 6.6, 2.3, 3.9, -4.6, 5.8, -7.4, 5.9, 2.8,
4.7, 3.9, -5.4, 9.1, -1.6, -1.9, -4.2, -2.6, 0.6, -5.1, 1.8, 5.2,
4.0, -6.2, 6.5, -9.1, 0.5, 2.1, 7.1, -8.6, 7.6, -9.7, -4.6, -5.7,
6.1, -1.8, -7.3, 9.4, 8.0, -2.6, -1.8, 5.7, 9.3, -7.9, 7.4, 6.3,
2.0, 9.6, -4.5, -6.2, 6.1, 2.3, 0.8, 5.9, -2.8, -3.5, -1.5, 6.0,
-4.9, 3.5, 7.7, -4.2, -9.7, 2.4, 8.1, 5.9, 3.4, -7.5, 7.5, 2.6,
4.7, 2.7, 2.2, 2.6, 6.2, 7.5, 0.2, -6.4, -2.8, -0.5, -0.3, 0.4,
1.2, 3.5, -4.0, -0.5, 9.3, -7.2, 8.5, -5.5, -1.7, -5.3, 0.3, 3.9,
-3.6, -3.6, 4.7, -8.1, 1.4, 4.0, 1.3, -4.3, -8.8, -7.3, 6.3, -7.5,
-9.0, 9.1, 4.5, -1.9, 1.9, 9.9, -1.7, -9.1, -5.1, 8.5, -9.3, 2.1,
-5.8, -3.6, -0.8, -0.9, -3.3, -2.7, 7.0, -7.2, -5.0, 7.4, -1.4, 0.0,
-4.5, -9.7, 0.7, -1.0, -9.1, -5.3, 4.3, 3.4, -6.6, 9.8, -1.1, 8.9,
5.0, 2.9, 0.2, -2.9, 0.8, 6.7, -0.6, 0.6, 4.1, 5.3, -1.7, -0.3,
4.2, 3.7, -8.3, 4.0, 1.3, 6.3, 0.2, 1.3, -1.1, -3.5, 2.8, -7.7,
6.2, -4.9, -9.9, 9.6, 3.0, -9.2, -8.0, -3.9, 7.9, -6.1, 6.0, 5.9,
9.6, 1.2, 6.2, 3.6, 2.1, 5.8, 9.2, -8.8, 8.8, -3.3, -9.2, 4.6,
1.8, 4.6, 2.9, -2.7, 4.2, 7.3, -0.4, 7.7, -7.0, 2.1, 0.3, 3.7,
3.3, -8.6, 9.8, 3.6, 3.1, 6.5, -2.4, 7.8, 7.5, 8.4, -2.8, -6.3,
-5.1, -2.7, 9.3, -0.8, -9.2, 7.9, 8.9, 3.4, 0.1, -5.3, -6.8, 4.9,
4.3, -0.7, -2.2, -3.2, -7.5, -2.3, 0.0, 8.1, -9.2, -2.3, -5.7, 2.1,
2.6, 2.0, 0.3, -8.0, -2.0, -7.9, 6.6, 8.4, 4.0, -6.2, -6.9, -7.2,
7.7, -5.0, 5.3, 1.9, -5.3, -7.5, 8.8, 8.3, 9.0, 8.1, 3.2, 1.2,
-5.4, -0.2, 2.1, -5.2, 9.5, 5.9, 5.6, -7.8,
};
void handleFftError(VkFFTResult result, const std::string &msg) {
if (result == VKFFT_SUCCESS) {
return;
}
std::cerr << msg << ": " << getVkFFTErrorString(result) << std::endl;
throw std::exception{};
}
void handleHipError(hipError_t error) {
if (error == hipSuccess) {
return;
}
std::cerr << hipGetErrorName(error) << std::endl;
throw std::exception{};
}
#define DIM 3
#define XX 0
#define YY 1
#define ZZ 2
typedef int ivec[DIM];
const ivec realGridSize = {5, 5, 10};
const ivec realGridSizePadded = {realGridSize[XX], realGridSize[YY], ((realGridSize[ZZ] / 2) + 1) * 2};
const ivec complexGridSizePadded = {realGridSize[XX], realGridSize[YY], (realGridSize[ZZ] / 2) + 1};
const int size = complexGridSizePadded[0] * complexGridSizePadded[1] * complexGridSizePadded[2];
const int sizeInReals = size * 2;
constexpr bool sc_performOutOfPlaceFFT = false;
int main() {
hipDevice_t queue_device_ = 0;
hipSetDevice(queue_device_);
hipStream_t stream;
hipStreamCreate(&stream);
// Setup VkFFT plan
VkFFTConfiguration configuration_ = {};
VkFFTApplication application_ = {};
VkFFTLaunchParams launchParams = {};
uint64_t bufferSize_;
uint64_t inputBufferSize_;
configuration_.FFTdim = 3;
configuration_.size[0] = realGridSize[ZZ];
configuration_.size[1] = realGridSize[YY];
configuration_.size[2] = realGridSize[XX];
configuration_.performR2C = 1;
configuration_.device = &queue_device_;
configuration_.num_streams = 1;
bufferSize_ = sizeInReals * sizeof(float);
configuration_.bufferSize = &bufferSize_;
configuration_.aimThreads = 64; // Tuned for AMD GCN architecture
configuration_.bufferStride[0] = complexGridSizePadded[ZZ];
configuration_.bufferStride[1] = complexGridSizePadded[ZZ] * complexGridSizePadded[YY];
configuration_.bufferStride[2] = complexGridSizePadded[ZZ] *
complexGridSizePadded[YY] *
complexGridSizePadded[XX];
configuration_.isInputFormatted = 1;
configuration_.inverseReturnToInputBuffer = 1;
inputBufferSize_ = realGridSizePadded[XX] * realGridSizePadded[YY] *
realGridSizePadded[ZZ] * sizeof(float);
configuration_.inputBufferSize = &inputBufferSize_;
configuration_.inputBufferStride[0] = realGridSizePadded[ZZ];
configuration_.inputBufferStride[1] =
realGridSizePadded[ZZ] * realGridSizePadded[YY];
configuration_.inputBufferStride[2] =
realGridSizePadded[ZZ] * realGridSizePadded[YY] * realGridSizePadded[XX];
VkFFTResult result = initializeVkFFT(&application_, configuration_);
handleFftError(result, "Initializing VkFFT");
// Allocate and copy data
float *realGrid;
hipMalloc(&realGrid, sizeInReals * sizeof(float));
float *complexGrid;
hipMalloc(&complexGrid, sizeInReals * sizeof(float));
hipError_t res = hipMemcpy(realGrid, inputdata, sizeInReals * sizeof(float),
hipMemcpyHostToDevice);
handleHipError(res);
application_.configuration.stream = &stream;
launchParams.inputBuffer = reinterpret_cast<void **>(&realGrid);
launchParams.buffer = reinterpret_cast<void **>(&complexGrid);
// Run FFT
result = VkFFTAppend(&application_, -1, &launchParams);
handleFftError(result, "VkFFT: Real to complex");
res = hipStreamSynchronize(stream);
handleHipError(res);
result = VkFFTAppend(&application_, 1, &launchParams);
handleFftError(result, "VkFFT: Complex to real");
res = hipStreamSynchronize(stream);
handleHipError(res);
// Check the results
std::vector<float> rtResult(sizeInReals);
res = hipMemcpy(rtResult.data(), realGrid, sizeInReals * sizeof(float),
hipMemcpyDeviceToHost);
const float normalizationConstant =
1.0 / (realGridSize[XX] * realGridSize[YY] * realGridSize[ZZ]);
for (int i = 0; i < realGridSize[XX] * realGridSize[YY]; i++) {
const float *expectedArr = inputdata + i * realGridSizePadded[ZZ];
const float *actualArr = rtResult.data() + i * realGridSizePadded[ZZ];
for (size_t j = 0; j < realGridSize[ZZ]; j++) {
float diff = expectedArr[j] - actualArr[j] * normalizationConstant;
if (std::fabs(diff) > 1e-5) {
std::cerr << "Fail at index {" << i << ", " << j << "}: got "
<< actualArr[j]*normalizationConstant << ", expected " << expectedArr[j] << std::endl;
}
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment